{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Task\n",
    "\n",
    "This notebooks cleans the ECHP data for estimation of income processes. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import pandas as pd "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_path = '../data_sources/echp/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_p = pd.read_stata(data_path+'a_w'+str(1)+'p.dta',convert_categoricals=False)\n",
    "df_p['pid'] = df_p['pid'].astype('int64')\n",
    "df_p.set_index(['country','hid','pid'],inplace=True)\n",
    "df_p = df_p.sort_index()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_r = pd.read_stata(data_path+'a_w'+str(1)+'rel.dta')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_h = pd.read_stata(data_path+'a_w'+str(1)+'h.dta',convert_categoricals=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>wave</th>\n",
       "      <th>pg001</th>\n",
       "      <th>pg002</th>\n",
       "      <th>pg003</th>\n",
       "      <th>pg004</th>\n",
       "      <th>pg005</th>\n",
       "      <th>pg006</th>\n",
       "      <th>pg007</th>\n",
       "      <th>pg008</th>\n",
       "      <th>pd001</th>\n",
       "      <th>...</th>\n",
       "      <th>pm007a</th>\n",
       "      <th>pm007b</th>\n",
       "      <th>pm007c</th>\n",
       "      <th>pm008</th>\n",
       "      <th>pm010</th>\n",
       "      <th>pm011</th>\n",
       "      <th>pk001</th>\n",
       "      <th>pk002</th>\n",
       "      <th>pk003</th>\n",
       "      <th>pk004</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>country</th>\n",
       "      <th>hid</th>\n",
       "      <th>pid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">1</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">101</th>\n",
       "      <th>1101</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.08041</td>\n",
       "      <td>1.08041</td>\n",
       "      <td>25</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>1</td>\n",
       "      <td>1962</td>\n",
       "      <td>...</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>1</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1102</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.08041</td>\n",
       "      <td>1.08041</td>\n",
       "      <td>25</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>1</td>\n",
       "      <td>1964</td>\n",
       "      <td>...</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>1</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">201</th>\n",
       "      <th>2102</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1.35196</td>\n",
       "      <td>1.35196</td>\n",
       "      <td>15</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>1</td>\n",
       "      <td>1936</td>\n",
       "      <td>...</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>1</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2103</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1.35196</td>\n",
       "      <td>1.35196</td>\n",
       "      <td>-9</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>3</td>\n",
       "      <td>1963</td>\n",
       "      <td>...</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>1</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2104</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1.35196</td>\n",
       "      <td>1.35196</td>\n",
       "      <td>10</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>3</td>\n",
       "      <td>1967</td>\n",
       "      <td>...</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>1</td>\n",
       "      <td>-8</td>\n",
       "      <td>-8</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 224 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  wave  pg001    pg002    pg003  pg004  pg005  pg006  pg007  \\\n",
       "country hid pid                                                               \n",
       "1       101 1101     1      1  1.08041  1.08041     25     -8     -8     -8   \n",
       "            1102     1      1  1.08041  1.08041     25     -8     -8     -8   \n",
       "        201 2102     1      2  1.35196  1.35196     15     -8     -8     -8   \n",
       "            2103     1      2  1.35196  1.35196     -9     -8     -8     -8   \n",
       "            2104     1      2  1.35196  1.35196     10     -8     -8     -8   \n",
       "\n",
       "                  pg008  pd001  ...  pm007a  pm007b  pm007c  pm008  pm010  \\\n",
       "country hid pid                 ...                                         \n",
       "1       101 1101      1   1962  ...      -8      -8      -8      1     -8   \n",
       "            1102      1   1964  ...      -8      -8      -8      1     -8   \n",
       "        201 2102      1   1936  ...      -8      -8      -8      1     -8   \n",
       "            2103      3   1963  ...      -8      -8      -8      1     -8   \n",
       "            2104      3   1967  ...      -8      -8      -8      1     -8   \n",
       "\n",
       "                  pm011  pk001  pk002  pk003  pk004  \n",
       "country hid pid                                      \n",
       "1       101 1101     -8      4      4      5      5  \n",
       "            1102     -8      5      1      6      4  \n",
       "        201 2102     -8      3      4      5      5  \n",
       "            2103     -8      3      2      3      2  \n",
       "            2104     -8      5      3      4      2  \n",
       "\n",
       "[5 rows x 224 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_p.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "waves = [x for x in range(1,9)]\n",
    "df_s = []\n",
    "pvars = ['wave','hid','pd001','pd003','pd004','pd005','pi100','pi110','pi211m','pi211mg', 'pt022','country','pe001','pe032','pg002','pid']\n",
    "for w in waves:\n",
    "\t# person file\n",
    "\tdf_p = pd.read_stata(data_path+'a_w'+str(w)+'p.dta',convert_categoricals=False)\n",
    "\tdf_p = df_p[pvars]\n",
    "\tdf_p['pid'] = df_p['pid'].astype('int64')\n",
    "\tdf_p = df_p.drop_duplicates(subset=['country','hid','pid'],keep='last')\n",
    "\tdf_p.set_index(['country','hid','pid'],inplace=True)\n",
    "\tdf_p = df_p.sort_index()\t\n",
    "\t# relations\n",
    "\tdf_r = pd.read_stata(data_path+'a_w'+str(w)+'rel.dta',convert_categoricals=False)\n",
    "\tdf_r = df_r.rename({'pid1':'pid'},axis=1)\n",
    "\tdf_r['pid'] = df_r['pid'].astype('int64')\n",
    "\tdf_r = df_r[['country','hid','pid','relation']]\n",
    "\tdf_r = df_r[df_r['relation'].isin([0,1])]\n",
    "\tdf_r = df_r.drop_duplicates(subset=['country','hid','pid'],keep='last')\n",
    "\tdf_r.set_index(['country','hid','pid'],inplace=True)\n",
    "\tdf = df_r.merge(df_p,left_index=True,right_index=True,how='left')\n",
    "\t# household file\n",
    "\tdf_h = pd.read_stata(data_path+'a_w'+str(w)+'h.dta',convert_categoricals=False)\n",
    "\tdf_h = df_h[['hid','country','hi100']]\n",
    "\tdf_h.set_index(['hid','country'],inplace=True)\n",
    "\tdf_h = df_h.sort_index()\n",
    "\t# merging \n",
    "\tdf_m = df.reset_index().merge(df_h,left_on=['hid','country'],right_on=['hid','country'],how='left')\n",
    "\tdf_m = df_m.reset_index()\n",
    "\tdf_m.set_index(['country','hid','pid'],inplace=True)\n",
    "\tdf_s.append(df_m)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df_s[0]\n",
    "for d in df_s[1:]:\n",
    "\tdf = df.append(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "497001"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Age"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.rename({'pd003':'age'},axis=1)\n",
    "df = df[df['age']>=0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Income $Y$. We use total net (after tax) household income. \n",
    "\n",
    "HI100 TOTAL NET HOUSEHOLD INCOME (DETAILED, NC, TOTAL YEAR PRIOR TO THE SURVEY)\n",
    "\n",
    "In Italy, currency in liras, but in thousands"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.rename({'hi100':'Y'},axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Drop countries we do not need. Sweden missing because NA. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[df.index.get_level_values(0).isin([51,2,3,6,9,11])]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Map names of countries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "co_map = {51:'DE',2:'DK',3:'NL',6:'FR',9:'IT',11:'SP'}\n",
    "df['co'] = df.index.get_level_values(0).to_list()\n",
    "df['co'] = df['co'].replace(co_map)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Only keep dominants in households"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[df['relation']==0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a year variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['year'] = df['wave'].replace({1:1994,2:1995,3:1996,4:1997,5:1998,6:1999,7:2000,8:2021})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Keep variables of interest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[['wave','year','age','co','Y']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will use mostly logs to estimate covariance structures. Therefore, we will normalize incomes using means. \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[df['Y']>0]\n",
    "for c in ['DE','DK','NL','FR','IT','SP']:\n",
    "    df.loc[df.co==c,'Y'] = df.loc[df.co==c,'Y']/df.loc[df.co==c,'Y'].mean()\n",
    "    up = df.loc[df.co==c,'Y'].quantile(0.99)\n",
    "    low = df.loc[df.co==c,'Y'].quantile(0.01)\n",
    "    df.loc[df.co==c,'Y'] = df.loc[df.co==c,'Y'].clip(low,up)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['logY'] = np.where((df['Y'].isna()==False),np.log(df['Y']),np.nan)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    55639.000000\n",
       "mean         0.981580\n",
       "std          0.583385\n",
       "min          0.012390\n",
       "25%          0.602519\n",
       "50%          0.856327\n",
       "75%          1.217810\n",
       "max          4.053713\n",
       "Name: Y, dtype: float64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Y'].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Simple means by country and year. Still in national currencies. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>co</th>\n",
       "      <th>DE</th>\n",
       "      <th>DK</th>\n",
       "      <th>FR</th>\n",
       "      <th>IT</th>\n",
       "      <th>NL</th>\n",
       "      <th>SP</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>year</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1994.0</th>\n",
       "      <td>0.921495</td>\n",
       "      <td>0.902370</td>\n",
       "      <td>0.898346</td>\n",
       "      <td>0.838978</td>\n",
       "      <td>0.883458</td>\n",
       "      <td>0.836638</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1995.0</th>\n",
       "      <td>0.910762</td>\n",
       "      <td>0.925969</td>\n",
       "      <td>0.927055</td>\n",
       "      <td>0.879379</td>\n",
       "      <td>0.863062</td>\n",
       "      <td>0.884825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996.0</th>\n",
       "      <td>0.940447</td>\n",
       "      <td>0.935613</td>\n",
       "      <td>0.949999</td>\n",
       "      <td>0.934095</td>\n",
       "      <td>0.891700</td>\n",
       "      <td>0.956648</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1997.0</th>\n",
       "      <td>0.975825</td>\n",
       "      <td>0.983764</td>\n",
       "      <td>0.978557</td>\n",
       "      <td>0.945717</td>\n",
       "      <td>0.964159</td>\n",
       "      <td>0.965695</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1998.0</th>\n",
       "      <td>0.985468</td>\n",
       "      <td>1.001260</td>\n",
       "      <td>1.004682</td>\n",
       "      <td>1.017363</td>\n",
       "      <td>0.990828</td>\n",
       "      <td>1.002173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1999.0</th>\n",
       "      <td>1.006344</td>\n",
       "      <td>1.032799</td>\n",
       "      <td>1.020241</td>\n",
       "      <td>1.054141</td>\n",
       "      <td>1.052355</td>\n",
       "      <td>1.060049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000.0</th>\n",
       "      <td>1.051998</td>\n",
       "      <td>1.061560</td>\n",
       "      <td>1.056058</td>\n",
       "      <td>1.085038</td>\n",
       "      <td>1.094258</td>\n",
       "      <td>1.088448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021.0</th>\n",
       "      <td>1.074947</td>\n",
       "      <td>1.093770</td>\n",
       "      <td>1.075113</td>\n",
       "      <td>1.086907</td>\n",
       "      <td>1.098574</td>\n",
       "      <td>1.142509</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "co            DE        DK        FR        IT        NL        SP\n",
       "year                                                              \n",
       "1994.0  0.921495  0.902370  0.898346  0.838978  0.883458  0.836638\n",
       "1995.0  0.910762  0.925969  0.927055  0.879379  0.863062  0.884825\n",
       "1996.0  0.940447  0.935613  0.949999  0.934095  0.891700  0.956648\n",
       "1997.0  0.975825  0.983764  0.978557  0.945717  0.964159  0.965695\n",
       "1998.0  0.985468  1.001260  1.004682  1.017363  0.990828  1.002173\n",
       "1999.0  1.006344  1.032799  1.020241  1.054141  1.052355  1.060049\n",
       "2000.0  1.051998  1.061560  1.056058  1.085038  1.094258  1.088448\n",
       "2021.0  1.074947  1.093770  1.075113  1.086907  1.098574  1.142509"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['year','co']).mean()['Y'].unstack()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A look at what the dataframe looks like"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>wave</th>\n",
       "      <th>year</th>\n",
       "      <th>age</th>\n",
       "      <th>co</th>\n",
       "      <th>Y</th>\n",
       "      <th>logY</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>country</th>\n",
       "      <th>hid</th>\n",
       "      <th>pid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2</th>\n",
       "      <th>7601</th>\n",
       "      <th>76101</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1994.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>DK</td>\n",
       "      <td>0.827465</td>\n",
       "      <td>-0.189389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8401</th>\n",
       "      <th>84101</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1994.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>DK</td>\n",
       "      <td>1.079543</td>\n",
       "      <td>0.076538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10701</th>\n",
       "      <th>107101</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1994.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>DK</td>\n",
       "      <td>0.767751</td>\n",
       "      <td>-0.264290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16501</th>\n",
       "      <th>165101</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1994.0</td>\n",
       "      <td>46.0</td>\n",
       "      <td>DK</td>\n",
       "      <td>1.332184</td>\n",
       "      <td>0.286820</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19901</th>\n",
       "      <th>199101</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1994.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>DK</td>\n",
       "      <td>0.818934</td>\n",
       "      <td>-0.199751</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      wave    year   age  co         Y      logY\n",
       "country hid   pid                                               \n",
       "2       7601  76101    1.0  1994.0  42.0  DK  0.827465 -0.189389\n",
       "        8401  84101    1.0  1994.0  70.0  DK  1.079543  0.076538\n",
       "        10701 107101   1.0  1994.0  22.0  DK  0.767751 -0.264290\n",
       "        16501 165101   1.0  1994.0  46.0  DK  1.332184  0.286820\n",
       "        19901 199101   1.0  1994.0  23.0  DK  0.818934 -0.199751"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Saving the output file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_pickle(data_path+'echp_incomes.pkl')\n"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "cf2a50979671a58939829e6829efb726aa5da11149213b77bd50351f899d04fb"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
